| nettime's_dusty_archivist on Mon, 20 Mar 2000 07:55:10 +0100 (CET) |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| <nettime> cndecode.c |
/*
* cndecode.c
* By Matthew Skala
*/
/*
* Utility to decode the Cyber Patrol 4 cyber.not file
* Usage:
* cndecode cyber.not dictionary suppdict iplist
* all arguments except the first are optional
* "dictionary" is a list (newline separated) of words for the URL-hash attack
* typically /usr/dict/words or equivalent, or use our ready-made one
* suppdict is another such list; if it's specified, it will be written to
* with the list of actual words found (saves time next run)
* iplist is lines of ip address and domain name, tab separated; it will be
* written to with any new addresses looked up (if we compile with reverse
* DNS enabled).
*/
/***************************************************************************/
/* System stuff */
/*
* Compiling notes:
* This was written under Linux on a PC, but should be portable to any 32-bit
* little-endian architecture. Since CP4 is PC-specific, that shouldn't be
* too much of a limitation. This program does require structures bigger than
* 64K, and so it might be touch-and-go on a 16-bit PC compiler. If you
* enable reverse DNS with the defines below, you will have to have reasonably
* Linux-ish (which in turn means reasonably BSD-ish) networking libraries.
* Reverse queries can take a long time. If you find this annoying, you
* can set a time limit, and then after that time expires the system will stop
* attempting reverse lookups. This is a win because the reverse lookups it
* already did are saved; next time, it'll pick up where it left off. Real
* Programmers, of course, would run multiple queries at once, but that would
* mean either splitting into several processes, multithreading, or "fake"
* multithreading with custom-written resolver routines. This way is a lot
* less stressful. (I have a perl script that spawns 40 processes to max out
* my modem, but it's a monstrosity.) The amount of CPU power required for
* CRC reversal scales exponentially with the number of characters of CRC
* reversing you choose; that also determines how accurately it'll guess for
* URL hashes that are not in the dictionary. It's probably smarter to get a
* bigger dictionary.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
/* Compile-time configuration */
/* Should we attempt reverse lookups? Requires Net libraries, active
* connection, and a certain amount of time. */
#define REVERSE_DNS
/* After this many seconds, no more reverse DNS queries will be performed.
* Default one hour, and ignored if you turned reverse DNS off. */
#define TIME_LIMIT 3600
/* How many characters of CRC reversal? Five is essentially free, more takes
* exponentially longer time. */
#define REVERSE_CRC_LENGTH 8
/* How many entries in the dictionary hash table? */
#define DICTHASH_SIZE 32767
/* How many entries in the IP address hash table? */
#define IPHASH_SIZE 32767
/* What's the longest line length we expect to see? */
#define LINELEN 256
/* Headers we only need if we're doing reverse DNS */
#ifdef REVERSE_DNS
#include <netdb.h>
#include <sys/socket.h>
#include <time.h>
#endif
/****************************************************************************/
/* Reference tables */
/* Forward CRC polynomial table */
unsigned long crctable[256]={
0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
};
/* This answers the question: how many freely-chosen bits do I include
* when I ask for a crc collision with input length (index)? */
char freebits[12]={0,0,0,0,1,4,10,16,22,28,34,40};
/* This says where each forced bit goes */
char bitsforced[32]={0,1,2,3,4,6,
8,9,10,11,12,14,
16,17,18,19,20,22,
24,25,26,27,28,30,
32,33,34,35,38,
40,41,42};
/* This says where each free bit goes */
char bitsfree[40]={36,43,44,46,
48,49,50,51,52,54,
56,57,58,59,60,62,
64,65,66,67,68,70,
72,73,74,75,76,78,
80,81,82,83,84,86,
88,89,90,91,92,94};
/* The portion of the inverted matrix corresponding to the CRC bits */
unsigned long crcmatrix[32]={
0x9BF7B4FE,0x10CEBBDB,0x3EC28E73,0xE516F5B2,
0x3EB07172,0xAC6CB91B,0x2344667F,0x25ECE58C,
0xD24109C4,0x501CB10A,0x97761211,0x0A2EF700,
0x0C806D13,0x55AE3901,0x4C147270,0xDAC3C857,
0x384B8A54,0xF7583CAD,0xA1DA1DC4,0x0028BBDC,
0xB5BB7FE3,0x99610C1A,0x1FC446C4,0x8DE0FF05,
0x01D3D128,0x64FAC9B2,0x3BC5E604,0xE564A85C,
0xADEB84A5,0xCFCDBB2B,0x3E7D9F68,0xA102B971
};
/* The portion of the inverted matrix corresponding to the free bits */
unsigned long freematrix[40]={
0x0CBFC054,0xAEAB35B2,0x315B20B2,0x1F113696,
0x6DA65FB4,0x08F3CFCD,0xC0E8FCF1,0xD928FA77,
0x58C085F6,0x55F7A6A4,0x726948CB,0xBEE706A6,
0xDE9BCF28,0x539FADD8,0xA5D7713D,0xA6B4900F,
0x3CA9547B,0xC98AC9B5,0xAF52FA18,0x60098F5B,
0x142D2C51,0x706AA085,0x46494250,0x54026BCE,
0xEBE4D0A3,0x673646B9,0x945A22D6,0x7C5347FB,
0xC61C9B99,0x97780ADB,0x7E9DB1AE,0x88C43E39,
0x55CEBFB3,0x5C81ADC9,0x0F3DD57C,0x3D44BCF3,
0x0383F8DD,0x73F38757,0xA8F2D5CF,0x2922BEA9
};
/* Matrix columns to take into account the canonicalization */
unsigned long lengthmatrix[12]={
0x84741063,0xC5273406,0xE5A222DF,0x9941CB2B,
0xD9EBE522,0xCB93A8AF,0x962E3D2D,0x90029144,
0x5B298B04,0x575F1D8A,0x78EE4BEC,0x47B6B86A
};
/* The Cyber Patrol blocking categories */
char *category[16]={
"Violence / Profanity",
"Partial Nudity",
"Full Nudity",
"Sexual Acts / Text",
"Gross Depictions / Text",
"Intolerance",
"Satanic or Cult",
"Drugs / Drug Culture",
"Militant / Extremist",
"Sex Education",
"Questionable / Illegal & Gambling",
"Alcohol & Tobacco",
"Reserved 4",
"Reserved 3",
"Reserved 2",
"Reserved 1"
};
/* This indicates the "score" for each possible character value. First
* 32 entries are for characters 32 to 63, second 32 entries are for
* characters 96 to 127. The way this works is that unexpected characters
* get higher scores and so are less likely to be chosen... this gives us
* that little bit of extra guidance to help find good reverse CRCs.
* The baseline is that an ordinary alphabet character is 10 points.
* Illegal characters count 50, ensuring that they're unlikely to ever be
* chosen. Scores assigned manually, and only semi-systematically.
*/
unsigned cscore[64]={
/* SP ! " # $ % & ' */
50,20,30,20,17,50,50,30,
/* ( ) * + , - . / */
20,20,20,17,20,12,12,50,
/* 0 1 2 3 4 5 6 7 */
14,14,15,15,15,15,15,15,
/* 8 9 : ; < = > ? */
15,14,20,20,50,20,50,30,
/* ` a b c d e f g */
30,7,10,10,9,7,10,10,
/* h i j k l m n o */
10,7,12,10,9,10,9,7,
/* p q r s t u v w */
10,15,9,9,9,7,12,10,
/* x y z { | } ~ DEL */
9,10,12,20,30,20,30,50
};
/***************************************************************************/
/* Data structures */
/* Structures for the hash tables */
typedef struct _DICTHASH_ENT {
struct _DICTHASH_ENT *next;
unsigned long hash;
char *word;
} DICTHASH_ENT;
typedef struct _IPHASH_ENT {
struct _IPHASH_ENT *next;
unsigned long ip;
char *name;
} IPHASH_ENT;
/* Linked list of blocking masks, for key printing */
typedef struct _BLOCKING_MASK {
struct _BLOCKING_MASK *next;
unsigned short mask;
} BLOCKING_MASK;
/* Global vars */
char *cyber_not;
long cyber_not_size;
DICTHASH_ENT **dicthash;
IPHASH_ENT **iphash;
BLOCKING_MASK *masks=NULL;
#ifdef REVERSE_DNS
time_t start_time;
#endif
/*************************************************************************/
/* Utility functions */
/* Encryption used to conceal the config files, and the deputy password */
void cpcrypt4(char *data,long length) {
unsigned char key;
long i,j;
key=(unsigned char)(length&0xFF);
for (i=0;i<2;i++) {
for (j=0;j<length;j++) {
key=(key>>1)+(key<<7);
key^=(unsigned char)data[j];
data[j]=(char)key;
}
}
}
/* The slightly nonstandard CRC32 used for URL hashing */
unsigned long forward_crc(char *input,int length) {
int i;
unsigned long rval=0;
for (i=0;i<length;i++)
rval=(rval>>8)^crctable[(rval&0xFF)^(input[i]|0x20)];
return rval;
}
#define GETBIT(p,b) ((((p)[(b)>>3])>>((b)&7))&1)
#define FLIPBIT(p,b) ((p)[(b)>>3]^=(1<<((b)&7)))
#define FLIPBITR(p,b) { if (length-1-((b)>>3)>=0) \
(p)[length-1-((b)>>3)]^=(1<<((b)&7)); }
/* attempt to reverse the CRC32 function */
void reverse_crc(unsigned long crc,int length,char *in,char *out) {
unsigned long bits;
int i;
/* correct for output length */
bits=lengthmatrix[length-1];
/* XOR in the CRC */
for (i=0;i<32;i++)
if (crc&(1<<i))
bits^=crcmatrix[i];
/* XOR in the free bits */
for (i=0;i<freebits[length-1];i++)
if (GETBIT(in,i))
bits^=freematrix[i];
/* set up output */
for (i=0;i<length;i++)
out[i]=0x20;
/* output forced bits */
for (i=0;i<32;i++)
if (bits&(1<<i))
FLIPBITR(out,bitsforced[i]);
/* output free bits */
for (i=0;i<freebits[length-1];i++)
if (GETBIT(in,i))
FLIPBITR(out,bitsfree[i]);
}
/* load a word into dictionary, if its hash was in cyber.not */
int guess_word(char *word,int length) {
unsigned long hash;
DICTHASH_ENT *tmp;
hash=forward_crc(word,length);
for (tmp=dicthash[hash%DICTHASH_SIZE];
tmp && (tmp->hash!=hash);
tmp=tmp->next);
if (tmp && !tmp->word) {
tmp->word=(char *)malloc(length+1);
if (!tmp->word) {
puts("ERROR - out of memory (dicthash entry)");
exit(1);
}
memcpy(tmp->word,word,length);
tmp->word[length]='\0';
return 1;
} else
return 0;
}
/* clear out the list of blocking masks */
void clear_blockmask_key(void) {
BLOCKING_MASK *tmp;
while (masks) {
tmp=masks;
masks=tmp->next;
free(tmp);
}
}
/* add a mask to the sorted list, if it's not already there. Yes, this
* is O(n**2), but the list never gets over a few tens of entries, and so
* doing it with a more sophisticated structure wouldn't be worthwhile. */
void add_blockmask(unsigned short newmask) {
BLOCKING_MASK *tmp=NULL,*tmp2;
/* skip past all the entries less than new */
while (masks && (masks->mask<newmask)) {
tmp2=masks->next;
masks->next=tmp;
tmp=masks;
masks=tmp2;
}
/* add new, if appropriate */
if ((!masks) || (masks->mask!=newmask)) {
tmp2=(BLOCKING_MASK *)malloc(sizeof(BLOCKING_MASK));
if (!tmp2) {
puts("ERROR - out of memory (blocking mask entry)");
exit(1);
}
tmp2->next=masks;
masks=tmp2;
tmp2->mask=newmask;
}
/* replace the skipped entries */
while (tmp) {
tmp2=tmp->next;
tmp->next=masks;
masks=tmp;
tmp=tmp2;
}
}
/* print a key of the masks currently on the list */
void print_blockmask_key(void) {
BLOCKING_MASK *tmp;
unsigned short tm;
int print_head,bit;
for (tmp=masks;tmp;tmp=tmp->next) {
printf("%04X: ",tmp->mask);
print_head=6;
tm=tmp->mask;
bit=0;
while (tm>0) {
if (tm&1) {
if (print_head+strlen(category[bit])>71) {
printf("\n ");
print_head=7;
} else {
putchar(' ');
print_head++;
}
printf("%s",category[bit]);
print_head+=strlen(category[bit]);
if (tm&~1) {
putchar(',');
print_head++;
}
}
bit++;
tm>>=1;
}
putchar('\n');
}
}
/* print a pretty IP address, with reverse lookup if we're allowed */
void print_ip(unsigned long ip) {
IPHASH_ENT *tmp;
char *name;
/* check if it's already in the table */
for (tmp=iphash[ip%IPHASH_SIZE];
tmp && (tmp->ip!=ip);
tmp=tmp->next);
if (tmp) /* if so, just print that */
printf("%s",tmp->name);
#ifdef REVERSE_DNS
/* if we're allowed a reverse lookup, take it */
else if (time(NULL)<start_time+TIME_LIMIT) {
struct hostent *he;
he=gethostbyaddr((char *)&ip,4,AF_INET);
tmp=(IPHASH_ENT *)malloc(sizeof(IPHASH_ENT));
name=(char *)malloc(he?strlen(he->h_name)+1:16);
if (!tmp || !name) {
puts("ERROR - out of memory (IP hash ent)");
exit(1);
}
tmp->next=iphash[ip%IPHASH_SIZE];
iphash[ip%IPHASH_SIZE]=tmp;
tmp->ip=ip;
tmp->name=name;
if (he)
strcpy(name,he->h_name);
else
sprintf(name,"%ld.%ld.%ld.%ld",
ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24);
printf("%s",name);
}
#endif
else /* finally, we just print it out numerically */
printf("%ld.%ld.%ld.%ld",ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24);
}
/* reverse a hash and print the results */
void print_revhash(unsigned long hash) {
DICTHASH_ENT *tmp;
char *neww,freeb[6],plaintext[12];
unsigned score,bestscore=(unsigned)-1;
int length,i,bflip;
/* check if it's already in the table */
for (tmp=dicthash[hash%DICTHASH_SIZE];
tmp && (tmp->hash!=hash);
tmp=tmp->next);
if (!tmp) {
puts("ERROR - reversing unseen hash (should never happen)");
exit(1);
}
if (!tmp->word) { /* if no word, attempt reversal */
neww=(char *)malloc(12);
if (neww==NULL) {
puts("ERROR - out of memory (new word)\n");
exit(1);
}
tmp->word=neww;
strcpy(neww,"?UNKNOWN?");
for (length=1;length<=REVERSE_CRC_LENGTH;length++) {
for (i=0;i<6;i++) freeb[i]=0;
do {
/* reverse the hash, and see if that worked */
reverse_crc(hash,length,freeb,plaintext);
if (hash==forward_crc(plaintext,length)) {
/* compute the score for this guess */
score=0;
for (i=0;i<length;i++)
score+=cscore[plaintext[i]-(plaintext[i]>64?64:32)];
if (plaintext[0]=='~') /* ~ at start is ignored */
score-=cscore['~'-64];
/* if this is an improvement, use it */
if (score<bestscore) {
bestscore=score;
memcpy(neww,plaintext,length);
neww[length]='\0';
}
}
for (bflip=0;GETBIT(freeb,bflip);bflip++) FLIPBIT(freeb,bflip);
FLIPBIT(freeb,bflip);
} while (bflip<freebits[length-1]);
}
}
/* now print whatever word we found */
printf("%s",tmp->word);
}
/***************************************************************************/
/* Main functional blocks */
/* Load and decrypt cyber.not */
void load_cyber_not(char *filename) {
FILE *datafile;
datafile=fopen(filename,"rb");
if (!datafile) {
puts("ERROR - can't open cyber.not");
exit(1);
}
if ((fseek(datafile,0,SEEK_END)<0)
|| ((cyber_not_size=ftell(datafile))<0)
|| (fseek(datafile,0,SEEK_SET)<0)) {
puts("ERROR - can't reposition in cyber.not");
exit(1);
}
cyber_not=(char *)malloc(cyber_not_size);
if ((!cyber_not)
|| (fread(cyber_not,1,cyber_not_size,datafile)<cyber_not_size)) {
puts("ERROR - can't read cyber.not");
}
fclose(datafile);
cpcrypt4(cyber_not,cyber_not_size);
}
/* Initialize the dictionary hash table with the hashes in cyber.not */
void find_hashes_to_reverse(void) {
long table1_start,table1_end;
long i,hcnt=0;
unsigned long hash;
short mask;
char length;
DICTHASH_ENT *tmp;
/* find Table 1 in cyber.not */
memcpy(&table1_start,cyber_not+0x0010,4); /* Table 1 offset */
memcpy(&table1_end,cyber_not+0x0014,4); /* Table 1 length */
table1_end+=table1_start;
table1_start+=2; /* "SD" marker */
table1_end-=2; /* "ED" marker */
/* initialize our hash table to empty */
dicthash=(DICTHASH_ENT **)malloc(DICTHASH_SIZE*sizeof(DICTHASH_ENT *));
if (!dicthash) {
puts("ERROR - can't allocate dictionary hash");
exit(1);
}
for (i=0;i<DICTHASH_SIZE;i++) dicthash[i]=NULL;
/* step through the table, looking for hashes */
for (i=table1_start;i<table1_end;) {
i+=4; /* skip IP address */
memcpy(&mask,cyber_not+i,2); /* category mask */
i+=2; /* skip over mask */
if (mask==0) { /* we have hash records */
for (;(length=cyber_not[i]);) { /* yes, this should be assignment */
i+=3; /* skip length and mask */
for (length-=3;length>0;length-=4) {
memcpy(&hash,cyber_not+i,4);
i+=4;
for (tmp=dicthash[hash%DICTHASH_SIZE];
tmp && (tmp->hash!=hash);
tmp=tmp->next); /* does this hash exist already? */
if (!tmp) { /* if not, add it */
tmp=(DICTHASH_ENT *)malloc(sizeof(DICTHASH_ENT));
if (!tmp) {
puts("ERROR - out of memory");
exit(1);
}
tmp->next=dicthash[hash%DICTHASH_SIZE];
dicthash[hash%DICTHASH_SIZE]=tmp;
tmp->hash=hash;
tmp->word=NULL;
hcnt++;
}
}
}
i++; /* skip terminating length */
}
}
printf("Scanning cyber.not, found %ld unique hash values\n",hcnt);
}
/* load a dictionary file */
void load_dictionary(char *filename) {
FILE *datafile;
char textline[LINELEN+8],x;
int length,newwords=0,i;
datafile=fopen(filename,"rt");
if (!datafile) {
/* not an error! the dictionary need not exist */
printf("Dictionary file %s missing.\n",filename);
return;
}
/* First pass: word, ~word, word.htm, word.html */
textline[0]='~';
while (!feof(datafile) && fgets(textline+1,LINELEN,datafile)) {
if (strchr(textline+1,'#') || strchr(textline+1,' '))
continue;
length=strlen(textline+1)-1;
textline[length+1]='\0';
for (i=1;i<=length;i++) textline[i]=tolower(textline[i]);
strcat(textline+1,".html");
newwords+=guess_word(textline+1,length); /* word */
newwords+=guess_word(textline,length+1); /* ~word */
newwords+=guess_word(textline+1,length+4); /* word.htm */
newwords+=guess_word(textline+1,length+5); /* word.html */
}
/* Second pass: Xword, ~Xword, wordX, ~wordX, xwordx */
fseek(datafile,0,SEEK_SET);
while (!feof(datafile) && fgets(textline+2,LINELEN,datafile)) {
if (strchr(textline+2,'#') || strchr(textline+2,' '))
continue;
length=strlen(textline+2)-1;
for (i=2;i<=length+1;i++) textline[i]=tolower(textline[i]);
for (x='a';x<='z';x++) {
textline[1]=x;
textline[length+2]=x;
newwords+=guess_word(textline+1,length+1); /* Xword */
newwords+=guess_word(textline,length+2); /* ~Xword */
newwords+=guess_word(textline+2,length+1); /* wordX */
if (x=='x')
newwords+=guess_word(textline+1,length+2); /* xwordx */
textline[1]='~';
newwords+=guess_word(textline+1,length+2); /* ~wordX */
}
for (x='0';x<='9';x++) {
textline[1]=x;
textline[length+2]=x;
newwords+=guess_word(textline+1,length+1); /* Xword */
newwords+=guess_word(textline,length+2); /* ~Xword */
newwords+=guess_word(textline+2,length+1); /* wordX */
textline[1]='~';
newwords+=guess_word(textline+1,length+2); /* ~wordX */
}
}
/* Third pass: .htm and .html variants of second pass */
fseek(datafile,0,SEEK_SET);
while (!feof(datafile) && fgets(textline+2,LINELEN,datafile)) {
if (strchr(textline+2,'#') || strchr(textline+2,' '))
continue;
length=strlen(textline+2)-1;
for (i=2;i<=length+1;i++) textline[i]=tolower(textline[i]);
for (x='a';x<='z';x++) {
textline[1]=x;
textline[length+2]=x;
textline[length+3]='\0';
strcat(textline+2,".html");
newwords+=guess_word(textline+2,length+5); /* wordX.htm */
newwords+=guess_word(textline+2,length+6); /* wordX.html */
textline[length+2]='\0';
strcat(textline+2,".html");
newwords+=guess_word(textline+1,length+5); /* Xword.htm */
newwords+=guess_word(textline+1,length+6); /* Xword.html */
}
for (x='0';x<='9';x++) {
textline[1]=x;
textline[length+2]=x;
textline[length+3]='\0';
strcat(textline+2,".html");
newwords+=guess_word(textline+2,length+5); /* wordX.htm */
newwords+=guess_word(textline+2,length+6); /* wordX.html */
textline[length+2]='\0';
strcat(textline+2,".html");
newwords+=guess_word(textline+1,length+5); /* Xword.htm */
newwords+=guess_word(textline+1,length+6); /* Xword.html */
}
}
fclose(datafile);
printf("Found %d new words in %s\n",newwords,filename);
}
/* save a dictionary file */
void save_dictionary(char *filename) {
FILE *datafile;
long i;
DICTHASH_ENT *tmp;
/* open */
datafile=fopen(filename,"wt");
if (!datafile) {
/* this error is not fatal */
printf("ERROR - cannot write dictionary %s (non-fatal)\n",filename);
return;
}
/* write */
for (i=0;i<DICTHASH_SIZE;i++)
for (tmp=dicthash[i];tmp;tmp=tmp->next)
if (tmp->word)
fprintf(datafile,"%s\n",tmp->word);
/* close */
fclose(datafile);
}
void dump_newsgroup_blocks(void) {
long table3_start,table3_end;
long i;
short mask;
char length;
puts("*** NEWSGROUP BLOCKS ***\n");
/* find Table 3 in cyber.not */
memcpy(&table3_start,cyber_not+0x0024,4); /* Table 3 offset */
memcpy(&table3_end,cyber_not+0x0028,4); /* Table 3 length */
table3_end+=table3_start;
table3_start+=2; /* "SD" marker */
table3_end-=2; /* "ED" marker */
/* get ready to print a new blocking mask key */
clear_blockmask_key();
/* step through the table, looking for newsgroups */
for (i=table3_start;i<table3_end;) {
length=cyber_not[i];
i+=1; /* skip length byte */
memcpy(&mask,cyber_not+i,2); /* category mask */
i+=2; /* skip over mask */
add_blockmask(mask);
printf("%04X ",mask);
for (length-=3;length>0;length--) {
putchar(cyber_not[i]);
i++;
}
putchar('\n');
}
/* print the key */
putchar('\n');
print_blockmask_key();
putchar('\n');
}
/* Load a file of reverse-lookup hints. Just initializes the table if
* the filename parameter is null. */
void load_iphints(char *filename) {
FILE *datafile;
char name[LINELEN],*ntmp;
int i,ipa,ipb,ipc,ipd,count=0;
unsigned long ip;
IPHASH_ENT *tmp;
/* initialize our hash table to empty */
iphash=(IPHASH_ENT **)malloc(IPHASH_SIZE*sizeof(IPHASH_ENT *));
if (!iphash) {
puts("ERROR - can't allocate IP hash");
exit(1);
}
for (i=0;i<IPHASH_SIZE;i++) iphash[i]=NULL;
if (filename==NULL) return;
datafile=fopen(filename,"rt");
if (!datafile) {
/* not an error! the hints file need not exist */
printf("IP address file %s missing.\n",filename);
return;
}
while (!feof(datafile)) {
fscanf(datafile,"%d.%d.%d.%d\t%s\n",&ipa,&ipb,&ipc,&ipd,name);
ip=((long)ipa)+((long)ipb<<8)+((long)ipc<<16)+((long)ipd<<24);
tmp=(IPHASH_ENT *)malloc(sizeof(IPHASH_ENT));
ntmp=(char *)malloc(strlen(name)+1);
if ((!tmp) || (!ntmp)) {
puts("ERROR - out of memory (IP hash entry)");
exit(1);
}
tmp->next=iphash[ip%IPHASH_SIZE];
iphash[ip%IPHASH_SIZE]=tmp;
tmp->ip=ip;
tmp->name=ntmp;
strcpy(ntmp,name);
count++;
}
fclose(datafile);
printf("Found %d IP addresses in %s\n",count,filename);
}
/* save reverse-lookup hints file */
void save_iphints(char *filename) {
FILE *datafile;
long i;
IPHASH_ENT *tmp;
/* open */
datafile=fopen(filename,"wt");
if (!datafile) {
/* this error is not fatal */
printf("ERROR - cannot write IP hints %s (non-fatal)\n",filename);
return;
}
/* write */
for (i=0;i<IPHASH_SIZE;i++)
for (tmp=iphash[i];tmp;tmp=tmp->next)
fprintf(datafile,"%ld.%ld.%ld.%ld\t%s\n",
tmp->ip&0xFF,(tmp->ip>>8)&0xFF,(tmp->ip>>16)&0xFF,tmp->ip>>24,
tmp->name);
/* close */
fclose(datafile);
}
/* OK, this is the good part */
void dump_web_blocks(void) {
long table1_start,table1_end,table2_start,table2_end;
long i,j;
unsigned long ip,hash;
short mask;
char length;
/* find Table 1 in cyber.not */
memcpy(&table1_start,cyber_not+0x0010,4); /* Table 1 offset */
memcpy(&table1_end,cyber_not+0x0014,4); /* Table 1 length */
table1_end+=table1_start;
table1_start+=2; /* "SD" marker */
table1_end-=2; /* "ED" marker */
/* find Table 2 in cyber.not */
memcpy(&table2_start,cyber_not+0x001A,4); /* Table 2 offset */
memcpy(&table2_end,cyber_not+0x001E,4); /* Table 2 length */
table2_end+=table2_start;
table2_start+=2; /* "SD" marker */
table2_end-=2; /* "ED" marker */
/* step through the table 1, printing out the blocks*/
for (i=table1_start;i<table1_end;) {
puts("************************************"
"************************************");
clear_blockmask_key();
memcpy(&ip,cyber_not+i,4);
i+=4; /* skip IP address */
memcpy(&mask,cyber_not+i,2); /* category mask */
i+=2; /* skip over mask */
/* print the IP address and synonyms */
putchar(' ');
putchar(' ');
print_ip(ip);
putchar('\n');
for (j=table2_start;j<table2_end;) {
memcpy(&hash,cyber_not+j,4);
j+=4;
length=cyber_not[j++];
if (hash==ip) {
for (;length>0;length--) {
memcpy(&hash,cyber_not+j,4);
j+=4;
putchar('=');
putchar(' ');
print_ip(hash);
putchar('\n');
}
} else
j+=(length*4);
}
/* print the individual blocks */
if (mask==0) {
putchar('\n');
for (;(length=cyber_not[i]);) { /* yes, this should be assignment */
i++; /* skip length */
memcpy(&mask,cyber_not+i,2); /* category mask */
i+=2; /* skip over mask */
add_blockmask(mask);
printf("%04X http://%ld.%ld.%ld.%ld/",mask,
ip&0xFF,(ip>>8)&0xFF,(ip>>16)&0xFF,ip>>24);
for (length-=3;length>0;length-=4) {
memcpy(&hash,cyber_not+i,4);
i+=4;
print_revhash(hash);
putchar('/');
}
putchar('\n');
}
i++; /* skip terminating length */
putchar('\n');
} else { /* block on entire site */
add_blockmask(mask);
puts("ENTIRE SITE:");
}
print_blockmask_key();
}
}
/**************************************************************************/
/* Main program */
int main(int argc,char **argv) {
puts("cndecode - Cyber Patrol 4 cyber.not decoder");
puts("By Matthew Skala\n");
#ifdef REVERSE_DNS
start_time=time(NULL);
#endif
if (argc<2) {
puts("ERROR - no cyber.not file specified");
return 1;
}
load_cyber_not(argv[1]);
find_hashes_to_reverse();
if (argc>=4) /* supplemental */
load_dictionary(argv[3]);
if (argc>=3) /* main */
load_dictionary(argv[2]);
if (argc>=5)
load_iphints(argv[4]);
else
load_iphints(NULL);
dump_newsgroup_blocks();
dump_web_blocks();
if (argc>=4)
save_dictionary(argv[3]);
if (argc>=5)
save_iphints(argv[4]);
return 0;
}
# distributed via <nettime>: no commercial use without permission
# <nettime> is a moderated mailing list for net criticism,
# collaborative text filtering and cultural politics of the nets
# more info: majordomo@bbs.thing.net and "info nettime-l" in the msg body
# archive: http://www.nettime.org contact: nettime@bbs.thing.net